global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/weights.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {



/* This do-file builds all patenting-weight versions for the independent variables */

************************************
* Load country list
************************************
do ${code_dir}/config/country_list.do

if "$weight_category" != "excluding" {


	*for pauto95 we load the patent list and rename it so its easier to work with
	if "$wtype" == "pauto95" {
		use ${dataset_dir}/patent_list/${wtype}_patents, clear
		save ${dataset_dir}/patent_list/pats_${wtype}.dta, replace
	}



***********************************************************************************************************************
*First: EPO only
***********************************************************************************************************************

***********************************************************************************************************************
* Deal with EPO patents not granted
***********************************************************************************************************************

* a) Work out average annual distribution of patents for firms not having had any previous patents granted by EPO
use ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, clear
mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_info.dta, unmatched(master) ukeep(appln_year)
drop _m

*restrict to patents in relevant technical field (tfacit or pauto95)
if "$wtype" != "all" {
	mmerge appln_id using ${dataset_dir}/patent_list/pats_${wtype}.dta, unmatched(none)
	drop _m
}

*get applications in a country for a given year
gen byte x=1
bys appln_year country : egen nb_pat = sum(x)

*generate granted patent applications at aurpoean level
bys appln_id: gen y = (_n==1)
bys appln_year : egen nb_EPO_pat_granted = sum(y)
drop appln_id x y

*go to granted only level and generate share of accepted applications
duplicates drop
gen share_country = nb_pat/nb_EPO_pat_granted
gen str2 appln_auth="EP"
ren country EPC_country
ren share_country share_EPC_country
compress
*yearly averages 
save ${dataset_dir}/weights/EP_grants_transfers_average_${wtype}.dta, replace

*compute the average weight over the period
collapse (sum)share, by(appln_year)
ren share EPweight
save ${dataset_dir}/weights/EP_grants_transfers_average_EPweights_${wtype}.dta, replace


* b) Compute a firm's previous EP transfer history

*create a program to rectangularize and complete the set
cap program drop rectangularize
program rectangularize
	qui sum appln_year
	local max `r(max)'
	local old = _N
	local new = _N + 2016 - `max'
	qui set obs `new'
	qui replace appln_year = _n + `max' - `old' if _n > `old'
	qui replace BvD = BvD[1]
	fillin BvD appln_year country
	qui drop if country == ""
end

*load our firms and their appliations
use ${dataset_dir}/patstat_orbis/Orbis_patents_list_2017_merged.dta, clear

*keep only European patents and the relevant attributes
keep if appln_auth=="EP"
keep BvD appln_year appln_id

*restrict to patents in relevant technical field
if "$wtype" != "all" {
	mmerge appln_id using ${dataset_dir}/patent_list/pats_${wtype}.dta, unmatched(none)
	drop _m
}

*merge in the country of the patent application
mmerge appln_id using ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, unmatched(none) 
keep BvD appln_year country appln_id

*count patent applications of a firm in a country for a given year and grnated ones at the european level
gen byte x=1
bys BvD appln_year country : egen nb_pat = sum(x)
bys BvD appln_id: gen y = (_n==1)
bys BvD appln_year : egen nb_EPO_pat_granted = sum(y)
drop appln_id x y
duplicates drop

*complete set of Firms
runby rectangularize, by(BvD) verbose
replace nb_pat = 0 if _fillin
replace nb_EPO_pat_granted = 0 if _fillin
bys BvD appln_year : egen nb_EPO_pat_granted_ = max(nb_EPO_pat_granted)
replace nb_EPO_pat_granted = nb_EPO_pat_granted_
drop nb_EPO_pat_granted_ _fillin

*compute the sahre of a companies european granted patents
bys BvD appln_year : gen y = (_n==1)
gen Ynb_EPO_pat_granted = nb_EPO_pat_granted*y
gen stock_EPO_pat_granted = Ynb_EPO_pat_granted

*generate non deprectiating stock of (granted EPO) patents of a firm (for a given country; not exclusively EPO), is only temporarily used here as a restriction, not in ananlysis
*this is always 
bysort BvD (appln_year) : replace stock_EPO_pat_granted=stock_EPO_pat_granted+stock_EPO_pat_granted[_n-1] if _n>1
gen stock_patents_country = nb_pat
bysort BvD country (appln_year) : replace stock_patents_country=stock_patents_country+stock_patents_country[_n-1] if _n>1

*generate share of patents in a country relative to the stock of granted EPO patents
gen share_stock = stock_patents_country/stock_EPO_pat_granted
keep BvDIDnumber appln_year country share_stock
sort  BvDIDnumber appln_year country 
gen str2 appln_auth="EP"
ren country EPC_country
keep if share_stock > 0

*labeling

compress
save ${dataset_dir}/weights/EP_grants_transfers_orbis2017_${wtype}.dta, replace


* c) Compute a firm's previous direct patenting in EP countries

*get all EPO countries
use ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, clear
keep country
duplicates drop
tempfile EPcountries
save `EPcountries'

*load our firms and their appliations
use ${dataset_dir}/patstat_orbis/Orbis_patents_list_2017_merged.dta, clear
*restrict to patents in relevant technical field

*next resriction is usually tfacit1 (standard version) or pauto95
if "$wtype" != "all" {
	mmerge appln_id using ${dataset_dir}/patent_list/pats_${wtype}.dta, unmatched(none)
	drop _m
}

ren appln_auth country
mmerge country using `EPcountries', unmatched(master)
keep if _m == 3
keep BvD appln_year appln_id country
keep if appln_year < 9999
gen byte x=1

*count patent applications of a firm in a country for a given year
bys BvD appln_year country : egen nb_pat = sum(x)
drop appln_id x
duplicates drop

*complete the set
runby rectangularize, by(BvD) verbose
replace nb_pat = 0 if _fillin
drop _fillin

*once again, generate the stock of patents in a country for a given firm (this is better, why did we not do it like that above?)
bys BvD country (appln_year): gen stock_patents_country = sum(nb_pat)
bys BvD appln_year: egen stock_all_patents = sum(stock_patents_country)

*generate share of patent filings by a company in a given EPO country
gen share_stock = stock_patents_country / stock_all_patents
keep BvD appln_year country share_stock
sort BvD appln_year country
ren country EPC_country
keep if share_stock > 0

*lets see if this is larger than the transfers to EPO
mmerge appln_year using ${dataset_dir}/weights/EP_grants_transfers_average_EPweights_${wtype}.dta, unmatched(master)
drop _m
bys BvD appln_year : egen max_share = max(share_stock)
gen DHweight = 1/max_share
drop max_share
replace share_stock = share_stock * EPweight
drop EPweight DHweight
gen str2 appln_auth = "EP"
compress

*labeling

save ${dataset_dir}/weights/EP_countries_direct_orbis2017_${wtype}.dta, replace


***********************************************************************************************************************
***	 PATENT PORTFOLIO using all patents
***	 ORBIS 2017
***********************************************************************************************************************

* load our firms and their appliations
use ${dataset_dir}/patstat_orbis/Orbis_patents_list_2017_merged.dta, clear
keep BvD appln_id

*restrict to patents in relevant technical field (tfacit1 or pauto95)
if "$wtype" != "all" {
	mmerge appln_id using ${dataset_dir}/patent_list/pats_${wtype}.dta, unmatched(none)
	drop _m
}
*get application authority, year and NUMBER (not ID, we are going to patstat now!)
mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_info.dta, unmatched(none) ukeep(appln_auth appln_nr appln_year)
drop _m

*correct GER/GDR and RUS/USSR
do ${code_dir}/config/auth_map.do appln_auth
cap drop docdb_family_id

*mark non EP patents, mark those and set the country to be more specific than just EP
gen WD_nonEP = appln_auth != "EP"
mmerge appln_id using ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, unmatched(master) 
replace appln_auth=country if _m==3
gen WD_EPtrans = _m == 3
drop country

*mark the transfers that were made to the  EPO and then to patstat
gen share = 1
mmerge BvD appln_auth appln_year using ${dataset_dir}/weights/EP_grants_transfers_orbis2017_${wtype}.dta, unmatched(master)
replace appln_auth=EPC_country if _m==3
replace share = share_stock if _m == 3
gen WD_EPimptrans = _m == 3
drop EPC_country share_stock

*mark the direct patenting activity
mmerge BvD appln_auth appln_year using ${dataset_dir}/weights/EP_countries_direct_orbis2017_${wtype}.dta, unmatched(master)
replace appln_auth=EPC_country if _m==3
gen WD_EPimpdirect = _m == 3
replace share = share_stock if _m == 3
drop EPC_country share_stock

*fill in using the average activity
mmerge appln_auth appln_year using ${dataset_dir}/weights/EP_grants_transfers_average_${wtype}.dta, unmatched(master) ukeep(share_EPC_country EPC_country)
replace appln_auth=EPC_country if _m==3 
replace share = share_EPC_country if _m == 3
gen WD_EPimpavg = _m == 3
keep BvD appln_auth appln_year appln_nr share WD_*
duplicates drop
ren share x
gen WD_all = 1


*sum up a firms patents in a given year for a given patent authority, by patstat, and our different ways of obtaining an EPO patent
foreach xxx in "all" "nonEP" "EPtrans" "EPimptrans" "EPimpdirect" "EPimpavg" {
	bysort BvD appln_auth appln_year : egen patents_`xxx' = sum(x*WD_`xxx')
}
keep BvD appln_auth appln_year patents_*
duplicates drop

*make a new column to collect all patent applications filed by companies in a certain year with that authority
*then sum up the patents for a given year for a firm with a given authority-> all patents a firm filed with an authority, ever
foreach ctry in $countrylist_all{
	foreach xxx in "all" "nonEP" "EPtrans" "EPimptrans" "EPimpdirect" "EPimpavg" {
		gen auth_`ctry'_`xxx' = patents_`xxx' if appln_auth=="`ctry'"
		replace auth_`ctry'_`xxx' =0 if auth_`ctry'_`xxx' ==.
		bysort BvD appln_year : egen `xxx'_pat_`ctry' = sum(auth_`ctry'_`xxx')
		drop auth_`ctry'_`xxx'
	}
	ren all_pat_`ctry' allsectors_pat_`ctry'
}

* complete patent count of a firm (for our countryset and for all othe countries)
egen allsectors_pat_ourcountries = rowtotal(allsectors_pat*)
bysort BvD appln_year : egen allsectors_pat_allcountries = sum(patents_all)
gen other_allsectors_pat = allsectors_pat_allcountries - allsectors_pat_ourcountries
drop allsectors_pat_???countries
compress
drop appln_auth patents*
duplicates drop
ren appln_year year

*labeling

save ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, replace



* careful: no data for ex-USSR countries pre-1990. their weight is 0 before 1990

**************************************************************************************************************************************
*	*Weight versions with 5 or 10 yr windows
**************************************************************************************************************************************

* Shares based on GDP (standrad correction) with 5 yr window
use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year > =1995
drop if year < 1995-5
drop year

*get mean gdp of the timeperiod per country
collapse (mean) gdp_*

*check if gdp is in our countrylist
foreach ctry of global countrylist1995 {
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
		*applying our standard GDP correction
		gen gdpw_`ctry' = gdp_`ctry'^.35
}

*generate total GDP in our set of countries (World GDP) and calculate a countries part of it
egen total_gdp=rowtotal(gdpw_??), missing
foreach ctry of global countrylist1995{
	gen share_GDP_`ctry' = gdpw_`ctry' / total_gdp
}
keep sh*

*labeling

save ${dataset_dir}/weights/shares_GDP_allctries_1995.dta, replace


* Shares based on low-skilled compensation (only for 1995)
use ${dataset_dir}/indep_vars/totlsw_wide.dta, clear
keep if year == 1995
drop year

* check if all countries in our set are present (they should be)
foreach ctry of global countrylist1995 {
	cap noi assert !missing(totlsw_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
		*applying our standard correction (same as GDP)
		gen totlsww_`ctry' = totlsw_`ctry'^.35
}

*generate worlwide total low-skilled compensation and calculate a countries part of it
egen total_totlsw=rowtotal(totlsww_??), missing
foreach ctry of global countrylist1995{
	gen share_totlsw_`ctry' = totlsww_`ctry' / total_totlsw
}
keep sh*

*labeling

save ${dataset_dir}/weights/shares_totlsw_allctries_1995.dta, replace


* Do the GDP 0 version (i.e. no gdp correction) with 5 yr window
use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year >= 1995
drop if year < 1995-5
drop year
collapse (mean) gdp_*

*check if all countries in our set are present (they should be)
foreach ctry of global countrylist1995 {
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
		*applying our "GDP correction" (here: no correction at all, all countries weighted equally)
		gen gdpw_`ctry' = gdp_`ctry'^0.0
}
egen total_gdp=rowtotal(gdpw_??), missing

*all countries receive the same share of the total "world"	gdp
foreach ctry of global countrylist1995 {
	gen share_GDP_`ctry' = gdpw_`ctry' / total_gdp
}
keep sh*

*labeling

save ${dataset_dir}/weights/shares_GDP_allctries_1995_GDP0.dta, replace



*do a gdpcorrection with 1.0, so directly proportional to gdp with 5 yr window
use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year >= 1995
drop if year < 1995-5
drop year

*get mean gdp of the timeperiod per country
collapse (mean) gdp_*

*check if our countries are present in the set (they should be)
foreach ctry of global countrylist1995 {
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
	* applying the "correction" (done this way simply for readability reasons)
	gen gdpw_`ctry' = gdp_`ctry'^1.0
}

*generate share of world GDP
egen total_gdp=rowtotal(gdpw_??), missing
foreach ctry of global countrylist1995 {
	gen share_GDP_`ctry' = gdpw_`ctry' / total_gdp
}
keep sh*

*labeling

save ${dataset_dir}/weights/shares_GDP_allctries_1995_GDP1.dta, replace




use ${dataset_dir}/weights/shares_GDP_allctries_1995.dta, clear

* Shares patents Orbis 2017 with 10yr window
*load our patents
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1995-10
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
foreach ctry of global countrylist1995 {
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}

egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing
* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
*drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5

*normalize the shares for 
foreach ctry of global countrylist1995 {
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling

compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_10yr_1995_orbis2017_${wtype}.dta, replace
   

*******************************************************
* weight versions that USE ALL PATENTS FROM 1970
*******************************************************
* Shares patents Orbis 2017


use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
foreach ctry of global countrylist1995{
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing

* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5

*normalize the shares
foreach ctry of global countrylist1995{
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*
compress

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by GDP0.35"
}


save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_1995_orbis2017_${wtype}.dta, replace


*again, all patents, but total low-skilled compensation instead of gdp
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the ls compensation shares to create corrected values
cross using ${dataset_dir}/weights/shares_totlsw_allctries_1995.dta
foreach ctry of global countrylist1995 {
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_totlsw_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing

* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5
foreach ctry of global countrylist1995 {
		gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by total low skill compensation"
}


compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_totlsw_1995_orbis2017_${wtype}.dta, replace
 

*standrad gdp correction, but window from 1970 to 1989
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995-5
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
foreach ctry of global countrylist1995 {
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing
* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5

*normalize the shares
foreach ctry of global countrylist1995 {
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by GDP0.35"
}

compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_f1970t1989_1995_orbis2017_${wtype}.dta, replace


*standard window, but no gdp correction
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995_GDP0.dta
foreach ctry of global countrylist1995 {
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing

* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5

*normalize the shares
foreach ctry of global countrylist1995 {
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by equal GDP weights"
}


compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_GDP0_1995_orbis2017_${wtype}.dta, replace




*standard window but proportional to gdp
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995_GDP1.dta
foreach ctry of global countrylist1995{
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing

* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms. what to do? replace with shareplus1, or further weight by average distribution of patents worldwide?
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5
foreach ctry of global countrylist1995{
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by GDP"
}


compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_GDP1_1995_orbis2017_${wtype}.dta, replace



* ============= MINIMUM WAGE ===========
* Use all patents from 1970

*GDP set first
*load gdp data
use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year >= 1995
drop if year < 1995-5
drop year

*get mean gdp of the timeperiod per country
collapse (mean) gdp_*

*check if all countries in our set are present (they should be)
foreach ctry of global countrylistMINW1995 {
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
	*apply standard correction
	gen gdpw_`ctry' = gdp_`ctry'^.35
}

*generate total GDP in our set of countries (World GDP) and calculate a countries part of it
egen total_gdp=rowtotal(gdpw_??), missing
foreach ctry of global countrylistMINW1995 {
	gen share_GDP_`ctry' = gdpw_`ctry' / total_gdp
}
keep sh*

*labeling

ds share_GDP*
local r: di r(varlist)
foreach var of local r { 
	label var `var' "Share of GDP in a country adjusted by GDP0.35 for MINW set"
}

compress
save ${dataset_dir}/weights/shares_GDP_allctries_forMINW_1995.dta, replace



* ============= LONG-TERM INTEREST RATE ===========


*GDP set first
*load gdp data
use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year >= 1995
drop if year < 1995-5
drop year

*get mean gdp of the timeperiod per country
collapse (mean) gdp_*

*check if all countries in our set are present (they should be)
foreach ctry of global countrylistLintr1995 {
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
		di "Execution finished with errors."
		exit
	}
	*apply standard correction
	gen gdpw_`ctry' = gdp_`ctry'^.35
}

*generate total GDP in our set of countries (World GDP) and calculate a countries part of it
egen total_gdp=rowtotal(gdpw_??), missing
foreach ctry of global countrylistLintr1995 {
	gen share_GDP_`ctry' = gdpw_`ctry' / total_gdp
}
keep sh*

*labeling

ds share_GDP*
local r: di r(varlist)
foreach var of local r { 
	label var `var' "Share of GDP in a country adjusted by GDP0.35 for Lintr set"
}



save ${dataset_dir}/weights/shares_GDP_allctries_forLintr_1995.dta, replace


* Weights

use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year == 9999
drop if year >= 1995
drop if year < 1971
drop year

*generate total number of patents and total number of patents we imputed by year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)

*same thing over the entire period and shares
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

*cross with the GDP shares to create GDP corrected values
cross using ${dataset_dir}/weights/shares_GDP_allctries_forLintr_1995.dta
foreach ctry of global countrylistLintr1995 {
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing

* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms.
* drop firms that have too many patents or are missing shares entirely, or have too many patents outside our countrylist (i.e. we would not represent them correctly in our sample)
drop if total_pat_gdp_wtd == 0 | share_avgimp > 0.5 | other_share > 0.5
foreach ctry of global countrylistLintr1995 {
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*

*labeling
ds share*
local r: di r(varlist)
foreach var of local r { 
    label var `var' "Share of patents in a country adjusted by GDP0.35 for Lintr set"
}

compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_forLintr_from1970_1995_orbis2017_${wtype}.dta, replace


*10yr window, otherwise same as above
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year==9999
drop if year>=1995
drop if year<1995-10
drop year
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)	
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*
cross using ${dataset_dir}/weights/shares_GDP_allctries_forLintr_1995.dta
foreach ctry of global countrylistLintr1995{
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}

egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing
drop if total_pat_gdp_wtd==0 | share_avgimp > 0.5 | other_share > 0.5
foreach ctry of global countrylistLintr1995{
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}

keep BvD share2*
compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_forLintr_10yr_1995_orbis2017_${wtype}.dta, replace

*1970 to 89 window, otherwise same as above
use ${dataset_dir}/weights/bvdid_allpats_EPOtransfers_Orbis2017_${wtype}.dta, clear
drop if year==9999
drop if year>=1995-5
drop if year<1971
drop year
	
egen totalpat = rowtotal(allsectors_pat_*)
egen totalavgimp = rowtotal(EPimpavg_pat_*)	
collapse (sum) allsectors_pat_* total* other, by(BvD)
gen share_avgimp = totalavgimp / totalpat
gen other_share = other/(other + totalpat)
drop total*

cross using ${dataset_dir}/weights/shares_GDP_allctries_forLintr_1995.dta
foreach ctry of global countrylistLintr1995{
	gen share_pat_gdp_`ctry' = allsectors_pat_`ctry'*share_GDP_`ctry'
}
egen total_pat_gdp_wtd=rowtotal(share_pat_gdp_??), missing
* some shares are missing for all countries for some BVDIDs. eg for 1980 : 2100 missing over 53000 firms. what to do? replace with shareplus1, or further weight by average distribution of patents worldwide?
drop if total_pat_gdp_wtd==0 | share_avgimp > 0.5 | other_share > 0.5
foreach ctry of global countrylistLintr1995{
	gen share2_all_1995_`ctry' = share_pat_gdp_`ctry' / total_pat_gdp_wtd
}
keep BvD share2*
compress
save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_forLintr_f1970t1989_1995_orbis2017_${wtype}.dta, replace

}


***************************************
* Create additional/robustness weights
***************************************




if "$weight_category" == "excluding" {
	if "$wtype" == "tfacit1" {

	do ${code_dir}/config/country_list.do

	************************************************************
	* Exluding one country at a time 
	************************************************************

	* Create weights excluding a single (large) country
	* (otherwise, identical baseweight 1995 weights)

		foreach wt in $weight_versions {
			use ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_1995_orbis2017_${wtype}.dta, clear

			*drop the firms that are domestic in the excluded country
			drop if share2_all_1995_`wt' == 1 

			* calculate the shares excluding the excluded country
			foreach ctry of global countrylist1995 {
				gen share2_all_excl`wt'_`ctry' = share2_all_1995_`ctry' / (1 - share2_all_1995_`wt')
			}

			* set weight to zero for the excluded country
			replace share2_all_excl`wt'_`wt' = 0 
			keep BvD share2_all_excl*	
			save ${dataset_dir}/weights/bvdid_pat_weights_EPtr_excluding_`wt'_orbis2017_${wtype}.dta, replace
		}
	}
}

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat